rm(list = ls())
library(tidyverse)
ggplot(data = mpg ) + geom_point(mapping = aes(x = displ, y = hwy))
# seems same
ggplot(data = mpg, aes(x = displ, y = hwy) ) + geom_point()
######################################################################
# ggplot(data = <DATA>) + <GEOM_FUNCTION>(mapping = aes(<MAPPINGS>)) #
######################################################################
# put data in <DATA>
# put function (geom_point etc) in <GEOM_FUNCTION>
# put variables to plot in <MAPPINGS>
ggplot(data = mpg) + geom_point(mapping = aes(x = class, y = drv))
ggplot(data = mpg ) + geom_point(mapping = aes(x = displ, y = hwy, color = class))
* change the color of the points
ggplot(data = mpg ) + geom_point(mapping = aes(x = displ, y = hwy, size = class))
## Warning: Using size for a discrete variable is not advised.
### you will get a warning here *
size = will change the size of the points * using size = for discrete data is not a good idea
ggplot(data = mpg ) + geom_point(mapping = aes(x = displ, y = hwy, alpha = class))
* change the shade of the points (black-white)
ggplot(data = mpg ) + geom_point(mapping = aes(x = displ, y = hwy, shape = class))
## Warning: The shape palette can deal with a maximum of 6 discrete values
## because more than 6 becomes difficult to discriminate; you have 7.
## Consider specifying shapes manually if you must have them.
## Warning: Removed 62 rows containing missing values (geom_point).
* change the shape of the points * additional groups will go unplotted when you use the shape aesthetic and * the number of class exceed the default number of shapes (6)
ggplot(data = mpg ) + geom_point(mapping = aes(x = displ, y = hwy), color = "blue")
* change the color of all plots * note that
color = is outisude aes( )
ggplot(data = mpg ) + geom_point(mapping = aes(x = displ, y = hwy, color = cyl))
* you can use color or size, but not shape for continuous variables
ggplot(data = mpg ) + geom_point(mapping = aes(x = displ, y = hwy, stroke = 6))
* Stroke changes the color of the border for shapes (22-24).
ggplot(data = mpg ) + geom_point(mapping = aes(x = displ, y = hwy, color = displ < 5))
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy)) +
facet_wrap(~ class, nrow = 2)
* The variable that you pass to facet_wrap() should be discrete.
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy)) +
facet_wrap(~ class, nrow = 3)
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy)) +
facet_grid(drv ~ cyl)
* facet your plot on the combination of the two variables * ex) the plot above show the plot of x and y for each pair (drv, cyl)
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy)) +
facet_grid(drv ~ .)
* devide baed on one variable by row = plot drv on the y-axis
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy)) +
facet_grid(. ~ drv)
* devide baed on one variable by column = plot drv on the x-axis
ggplot(data = mpg) +
geom_smooth(mapping = aes(x = displ, y = hwy))
## `geom_smooth()` using method = 'loess'
ggplot(data = mpg) +
geom_smooth(mapping = aes(x = displ, y = hwy, linetype = drv))
## `geom_smooth()` using method = 'loess'
* separates the cars into three lines based on their drv value with different linetype each
ggplot(data = mpg) +
geom_smooth(mapping = aes(x = displ, y = hwy, group = drv))
## `geom_smooth()` using method = 'loess'
* separates the cars into three lines based on their drv value with same linetype each
ggplot(data = mpg, mapping = aes(x = displ, y = hwy)) +
geom_smooth() +
geom_point(mapping = aes(color = class))
## `geom_smooth()` using method = 'loess'
* points are separeted by the color based on class
ggplot(data = mpg, mapping = aes(x = displ, y = hwy)) +
geom_smooth(data = filter(mpg, class == "subcompact"), se = FALSE) +
geom_point(mapping = aes(color = class))
## `geom_smooth()` using method = 'loess'
* the local data argument in
geom_smooth() (= filter(mpg, class == "subcompact")) * overrides the global data in argument ggplot() for that layer only * smooth line is drawn for the data with class == “subcompact”
ggplot(data = mpg, mapping = aes(x = displ, y = hwy, color = drv)) +
geom_point() +
geom_smooth(se = FALSE)
## `geom_smooth()` using method = 'loess'
* separate both the smooth line and points with different colors based on
color = drv
show.legend = FALSE should be included in geom_point, geom_smooth, etcggplot(data = mpg, mapping = aes(x = displ, y = hwy)) +
geom_point() +
geom_smooth(color = "blue", se = FALSE)
## `geom_smooth()` using method = 'loess'
ggplot(data = mpg, mapping = aes(x = displ, y = hwy)) +
geom_point() +
geom_smooth(color = "red", se = FALSE)
## `geom_smooth()` using method = 'loess'
ggplot(data = mpg, mapping = aes(x = displ, y = hwy)) +
geom_point() +
geom_smooth(color = "blue", se = FALSE, aes(group = drv))
## `geom_smooth()` using method = 'loess'
ggplot(data = mpg, mapping = aes(x = displ, y = hwy, color = drv)) +
geom_point() +
geom_smooth(se = FALSE)
## `geom_smooth()` using method = 'loess'
ggplot(data = mpg, mapping = aes(x = displ, y = hwy)) +
geom_point(mapping = aes(color = drv)) +
geom_smooth(se = FALSE)
## `geom_smooth()` using method = 'loess'
# incorrect version
ggplot(data = mpg, mapping = aes(x = displ, y = hwy, color = drv)) +
geom_point() +
geom_smooth(se = FALSE, data = filter(mpg, drv == "r"))
## `geom_smooth()` using method = 'loess'
ggplot(data = mpg, mapping = aes(x = displ, y = hwy)) +
geom_point(mapping = aes(color = drv)) +
geom_smooth(se = FALSE, mapping = aes(linetype = drv))
## `geom_smooth()` using method = 'loess'
ggplot(data = mpg, mapping = aes(x = displ, y = hwy, fill = drv)) +
geom_point(color = "white", shape = 21)
# better?
ggplot(data = mpg, mapping = aes(x = displ, y = hwy, fill = drv)) +
geom_point(color = "white", shape = 21, size = 4)
# the two plots below are same
ggplot(data = diamonds, aes(x = cut)) + stat_count()
ggplot(data = diamonds, aes(x = cut)) + geom_bar()
demo <- tribble(
~a, ~b,
"bar_1", 20,
"bar_2", 30,
"bar_3", 40
)
#ggplot(demo, aes(x = a, y = b)) + geom_bar() # this does not work
ggplot(demo, aes(x = a, y = b)) + geom_bar(stat = "identity")
# stacked bar plot
# 積み重ねる型のバープロット
ggplot(data = diamonds) +
stat_summary(
mapping = aes(x = cut, y = depth),
fun.ymin = min,
fun.ymax = max,
fun.y = median
)
cut with bar length for minimum and maximumggplot(data = diamonds) +
geom_pointrange(
mapping = aes(x = cut, y = depth),
stat = "summary",
fun.ymin = min,
fun.ymax = max,
fun.y = mean # you can use median instead of mean
)
ggplot(data = diamonds, aes(x = cut, y = depth)) + geom_col()
# the default for geom_col: identity
# the default for geom_bar: summary
ggplot(data = mpg, mapping = aes(x = displ, y = hwy)) +
stat_smooth()
## `geom_smooth()` using method = 'loess'
ggplot(data = diamonds) +
geom_bar(mapping = aes(x = cut, y = ..prop..))
# this does not give what we want
ggplot(data = diamonds) +
geom_bar(mapping = aes(x = cut, y = ..prop.., group = 1))
# this works
# if `group` is not set to 1 or whatever, all the bars have prop == 1
# group="whatever" is a "dummy" grouping to override the default behavior
# for detail, see here -> http://stackoverflow.com/questions/39878813/r-ggplot-geom-bar-meaning-of-aesgroup-1
ggplot(data = diamonds) +
geom_bar(mapping = aes(x = cut, y = ..prop.., fill = color, group = color))
# this works too
ggplot(data = diamonds) +
geom_bar(mapping = aes(x = cut, color = cut))
ggplot(data = diamonds) +
geom_bar(mapping = aes(x = cut, fill = cut))
ggplot(data = diamonds) +
geom_bar(mapping = aes(x = cut, group = cut)) # same as without `group = `
ggplot(data = diamonds, mapping = aes(x = cut, color = clarity)) +
geom_bar(fill = NA, position = "identity")
ggplot(data = diamonds, mapping = aes(x = cut, fill = clarity)) +
geom_bar(alpha = .2, position = "identity")
ggplot(data = diamonds, mapping = aes(x = cut, fill = clarity)) +
geom_bar(position = "fill")
* each set of stacked bar the same height * hence easy to compare the proportions across group (across differnet sets of
cut) * 各barの高さは等しい、縦軸は割合なので、cut毎にclarityの割合を比べやすい
ggplot(data = diamonds, mapping = aes(x = cut, color = clarity)) +
geom_bar(position = "fill")
*見にくい
ggplot(data = diamonds, mapping = aes(x = cut, fill = clarity)) +
geom_bar(position = "dodge")
* places overlapping objects directly beside one another * This makes it easier to compare individual values. * identiyを使うと重なってしまう棒を隣り合わせて並べていくので、被るところがない * 縦軸は割合ではなく、積み重ねたカウント
# "jitter" add random noise to each point
ggplot(data = mpg ) + geom_point(mapping = aes(x = displ, y = hwy), position = "jitter")
* deal with overplotting, 同じ値を重ねてしまう問題
ggplot(data = mpg ) + geom_point(mapping = aes(x = cty, y = hwy), position = "jitter")
ggplot(data = mpg ) + geom_point(mapping = aes(x = cty, y = hwy))
width and height to control the amount of jitterwidth and horizontal jitter for heightggplot(data = mpg, mapping = aes(x = cty, y = hwy) ) + geom_jitter()
ggplot(data = mpg, mapping = aes(x = cty, y = hwy) ) + geom_jitter(width = 20, height = 20)
* too much noise
ggplot(data = mpg, mapping = aes(x = cty, y = hwy) ) + geom_jitter()
ggplot(data = mpg, mapping = aes(x = cty, y = hwy) ) + geom_count()
ggplot(data = mpg, aes(x = drv, y = hwy, color = class)) +
geom_boxplot()
ggplot(data = mpg, aes(x = drv, y = hwy, color = class)) +
geom_boxplot(position = "identity") # convey little information because of overlaps
ggplot(data = mpg, aes(x = drv, y = hwy, fill = class)) +
geom_boxplot()
# compare the figures above
ggplot(data = mpg, aes(x = drv, y = hwy, fill = class)) +
geom_boxplot() +
coord_flip()
# switch the x and y axes
ggplot(data = mpg, aes(x = drv, y = hwy, color = class)) +
geom_boxplot() +
coord_flip()
ggplot(data = mpg, aes(x = drv, color = class)) +
geom_bar()
ggplot(data = mpg, aes(x = drv, group = class)) +
geom_bar()
library(maps)
##
## Attaching package: 'maps'
## The following object is masked from 'package:purrr':
##
## map
nz <- map_data("nz")
ggplot(nz, aes(x = long, y = lat, group = group)) +
geom_polygon(fill = "white", color = "black")
bar <- ggplot(data = diamonds) +
geom_bar(
mapping = aes(x = cut, fill = cut),
show.legend = FALSE,
width = 1
) +
theme(aspect.ratio = 1) + # `aspect.ratio` determines ratio of vartical and horizontal
labs(x = NULL, y = NULL)
bar <- bar + coord_polar()
ggplot(mpg, aes(x = drv, fill = class)) +
geom_bar() +
coord_polar()
ggplot(mpg, aes(x = drv, fill = class)) +
geom_bar() +
coord_polar(theta = "y")
# answer?
ggplot(mpg, aes(x = factor(1), fill = class)) +
geom_bar() # カウントとして出力される
ggplot(mpg, aes(x = factor(1), fill = class)) +
geom_bar(positon = "fill") # 割合として出力される
## Warning: Ignoring unknown parameters: positon
ggplot(mpg, aes(x = drv, fill = class)) +
geom_bar(position = "fill") +
coord_polar()
ggplot(mpg, aes(x = drv, fill = class)) +
geom_bar(position = "fill") +
coord_polar(theta = "y")
# theta = "y" will give you the variable in `x =` in y axis
# theta = "x" will give you the variable in `x =` in x axis
ggplot(nz, aes(x = long, y = lat, group = group)) +
geom_polygon() +
coord_quickmap() # if you add this, the shape slightly changes
ggplot(nz, aes(x = long, y = lat, group = group)) +
geom_polygon() +
coord_map() # if you add this, the shape slightly changes
ggplot(data = mpg, mapping = aes(x = cty, y = hwy)) +
geom_point() +
geom_abline() +# y = x degree angle line
coord_fixed() # ensure that the abline is at a 45 degree angle